***************
* Title: gambia_ecd_edcc_figure3.do
* Author: Todd Pugatch
* Description: replication code for Blimpo, Carneiro, Jervis, and Pugatch,
*	"Improving Access and Quality in Early Childhood Development Programs: 
*		Experimental Evidence from The Gambia"
*	for Economic Development and Cultural Change
* Inputs: ECD_3to6_Gambia_cleanv1.dta
* Outputs: gambia_ecd_edcc_figure3.txt, gambia_ecd_edcc_figure3_gc[1-2].[png/emf/eps]
* Notes: creates Figure 3
****************
#delimit;
local start=`"$S_TIME"';
clear;
clear matrix;
clear mata;
graph drop _all;
cap log close;
set more off;
set scheme s1mono;
/*set directory:
	cd mydir
*/
local data=`"Data\cleaned"';
local output=`"analysis\output"';
log using analysis\output\gambia_ecd_edcc_figure3.txt, text replace;

* LOAD AND PREPARE DATA;
qui use `data'\ECD_3to6_Gambia_cleanv1, clear;
/*NEED TO CHECK TREATMENT STATUS OF SETTLEMENT 37028. DISREGARD FOR NOW*/
qui drop if settlement_code==37028;

* define 3 groups:
	--in baseline
	--in endline (original sample)
	--in endline (newly sampled);
qui gen in_baseline=(ip22!=3 & ip22!=.);	
qui gen in_endline=(interview_result==1);
qui gen in_endline_old=(in_endline==1 & in_baseline==1);
qui gen in_endline_new=(in_endline==1 & in_baseline==0);
/*treat unresolved gender mismatches as new to endline*/
qui replace in_endline_new=1 if in_endline_new==0 & child_gender_mismatch_resolved==0;
qui replace in_endline_old=. if in_endline_new==1;

* repeat for having valid MDAT fine motor and language/hearing scores;
/*note that "in sample" defined as having at least one valid MDAT score, not both as in previous analyses of attrition*/
qui gen in_baseline_mdat=(in_baseline==1 & (zfinemotor_baseline!=.|zlanghear_baseline!=.));
qui replace in_baseline_mdat=. if in_endline_new==1;
foreach x in endline endline_old endline_new {;
	qui gen in_`x'_mdat=(in_`x'==1 & (zfinemotor_endline!=.|zlanghear_endline!=.));
};
qui replace in_endline_old_mdat=. if in_endline_new==1;
qui gen in_endline_old_mdat_base=in_endline_old_mdat;
qui replace in_endline_old_mdat_base=. if in_baseline_mdat!=1; /*in baseline MDAT & in endline MDAT*/

/*keep eligibles*/
* keep if baseline age from 3-6 years, or new to endline;
count; 
keep if (child_age_mths_dob>=36 & child_age_mths_dob<84 & in_baseline==1)|(child_age_mths_dob==. & in_baseline==0);

* keep if new to endline and would have been 3-6 at baseline (define as 4-8 at endline to allow errors);
keep if in_endline_new==0|(in_endline_new==1 & selected_child_age>=48 & selected_child_age<=96 & selected_child_age!=.)|
	(in_endline_new==1 & child_gender_mismatch_resolved==0);
	
* define indicators for treatment;
qui gen communitybased=(treatment==6);
qui gen purecontrol=(treatment==1);
qui gen ECDAnnex_control=(treatment==4);
qui gen ECDAnnex_treated=(treatment==5);

* define outcomes (code follows gambia_ecd_results31.do);
* outcomes from full MDAT, normalized by pure control group:
	--normalized by pure control group distribution within survey wave
	--unadjusted and adjusted by age and gender;
foreach y in finemotor langhear {;
	if ("`y'"=="finemotor") local yy "fm";
	else if ("`y'"=="langhear") local yy "lh";
	foreach w in baseline endline {;
		if ("`w'"=="baseline") {;
			local ww "bl";
			local age "child_age_yrs_dob child_age_yrs_dob2";
		};
		else if ("`w'"=="endline") {;
			local ww "el";
			local age "selected_child_age selected_child_age2";
		};
		qui su `y'_`w' if purecontrol==1;
		qui gen z`yy'_full_`ww'=(`y'_`w'-r(mean))/r(sd);
		qui reg `y'_`w'	`age' child_female; /*doesn't matter if Y=raw or z score*/
		qui predict e, residuals; 
		qui su e if purecontrol==1;
		qui gen z`yy'_full_adj_`ww'=(e-r(mean))/r(sd);
		lab var z`yy'_full_`ww' "`y', `w' (normalized to pure control distribution)";
		lab var z`yy'_full_adj_`ww' "`y', `w' (age/gender-adjusted & normalized to pure control distribution)";
		drop e;	
	};
};

* MDAT scores from item response theory: 
*	--age adjusted and normalized by pure control group distribution within survey wave;
foreach y in fm lh {;
	foreach w in baseline endline {;
		qui su ztheta_`y'_adj_`w' if purecontrol==1;
		qui gen z`y'_irt_adj_`w'=(ztheta_`y'_adj_`w'-r(mean))/r(sd);
		lab var z`y'_irt_adj_`w' "`y' ability from item response model (age-adjusted and normalized by pure control), `w'";
	};
};

* keep only children aged 3-6 with valid baseline interview;
qui keep if child_age_mths_dob>=36 & child_age_mths_dob<84 & child_age_mths_dob!=.; 
qui keep if in_baseline==1; 

* omit height-for-age scores topcoded at +/-6;
qui gen haz_dob_baselinex=haz_dob_baseline;
count if haz_dob_baseline==6;
count if haz_dob_baseline==-6;
qui replace haz_dob_baselinex=. if haz_dob_baseline==6|haz_dob_baseline==-6;

* sample sizes (# of children and project sites);
qui egen site=tag(settlement_code);
table treatment, c(freq rawsum site);

* PAIRWISE CORRELATIONS;
local Y "zfm_irt_adj_baseline zlh_irt_adj_baseline haz_dob_baselinex";
local X "pc1 mother_schl_yrs mother_schl_ever mother_readorwrite region2 stimindex_bl";
foreach y in `Y' {;
	di "outcome=`y'";
	corr `y' `X';
};

* create deciles of asset and stimulation indices;
foreach x in pc1 stimindex_bl {;
	qui xtile `x'_decile=`x', n(10);
	lab var `x'_decile "decile of `x'";
};

* GRAPHS;
/*asset index: this version plots means against deciles as 1-10, then finds non-parametric fit through resulting graph*/
* pros: visually nice;
* cons: fitted line is through evenly spaced deciles, not actual data;
* first, get means and 95% CIs by decile;
foreach y in `Y' {;
	qui gen `y'_mu=.;
	qui gen `y'_lb=.;
	qui gen `y'_ub=.;
	forval q=1/10 {;
		qui ci means `y' if pc1_decile==`q';
		qui replace `y'_mu=r(mean) if pc1_decile==`q';
		qui replace `y'_lb=r(lb) if pc1_decile==`q';
		qui replace `y'_ub=r(ub) if pc1_decile==`q';
	};
};
qui egen x=tag(pc1_decile);

/*age*/
* version with means by 6-month age bins;
qui egen agebin=cut(child_age_mths_dob), at(36(6)84) label;
lab def agebin 0 "36" 1 "42" 2 "48" 3 "54" 4 "60" 5 "66" 6 "72" 7 "78" 8 "84", replace;
lab val agebin agebin;
foreach y in zfm_full_bl zlh_full_bl {;
	qui gen `y'_mu=.;
	qui gen `y'_lb=.;
	qui gen `y'_ub=.;
	forval q=0/8 {;
		qui ci means `y' if agebin==`q';
		qui replace `y'_mu=r(mean) if agebin==`q';
		qui replace `y'_lb=r(lb) if agebin==`q';
		qui replace `y'_ub=r(ub) if agebin==`q';
	};
};
qui egen x=tag(agebin) if in_baseline_mdat==1;

* COMPILE GRAPHS FOR DISPLAY;
local g=1;
/*asset index*/
* use version that plots means against deciles as 1-10, then finds non-parametric fit through resulting graph*/
* first, get means and 95% CIs by decile;
foreach y in `Y' {;
	qui gen `y'_mu=.;
	qui gen `y'_lb=.;
	qui gen `y'_ub=.;
	forval q=1/10 {;
		qui ci means `y' if pc1_decile==`q';
		qui replace `y'_mu=r(mean) if pc1_decile==`q';
		qui replace `y'_lb=r(lb) if pc1_decile==`q';
		qui replace `y'_ub=r(ub) if pc1_decile==`q';
	};
};
qui egen x=tag(pc1_decile);
twoway (lpoly zfm_irt_adj_baseline pc1_decile, degree(1) lwidth(thick) lcolor(gray)) 
	(scatter zfm_irt_adj_baseline_mu pc1_decile if x==1, msymbol(Oh) mcolor(black) msize(large) yline(0, lcolor(black)))
	(rspike zfm_irt_adj_baseline_lb zfm_irt_adj_baseline_ub pc1_decile if x==1, lcolor(black)),
	title("Fine motor skills") ytitle("z-score") xtitle("asset index (decile)") xlabel(1(1)10)  
	legend(label(1 "local linear fit") label(2 "mean") label(3 "95% CI") rows(1)) nodraw name(p`g');
local g=`g'+1;	
twoway (lpoly zlh_irt_adj_baseline pc1_decile, degree(1) lwidth(thick) lcolor(gray)) 
	(scatter zlh_irt_adj_baseline_mu pc1_decile if x==1, msymbol(Oh) mcolor(black) msize(large) yline(0, lcolor(black)))
	(rspike zlh_irt_adj_baseline_lb zlh_irt_adj_baseline_ub pc1_decile if x==1, lcolor(black)),
	title("Language and hearing") xtitle("asset index (decile)") xlabel(1(1)10)  
	legend(label(1 "local linear fit") label(2 "mean") label(3 "95% CI") rows(1)) nodraw name(p`g');
local g=`g'+1;
twoway (lpoly haz_dob_baselinex pc1_decile, degree(1) lwidth(thick) lcolor(gray)) 
	(scatter haz_dob_baselinex_mu pc1_decile if x==1, msymbol(Oh) mcolor(black) msize(large) yline(0, lcolor(black)))
	(rspike haz_dob_baselinex_lb haz_dob_baselinex_ub pc1_decile if x==1, lcolor(black)),
	title("Height for age") xtitle("asset index (decile)") xlabel(1(1)10)  
	legend(label(1 "local linear fit") label(2 "mean") label(3 "95% CI") rows(1)) nodraw name(p`g');
local g=`g'+1;
drop *mu *lb *ub x;
	
grc1leg p1 p2 p3, ycommon title("Wealth gradient") rows(1) cols(3) 
	note("Asset index is first principal component of asset ownsership. Outcomes adjusted by age.") name(gc1);
qui graph export `output'\gambia_ecd_edcc_figure3_gc1.png, as(png) replace;
qui graph export `output'\gambia_ecd_edcc_figure3_gc1.emf, as(emf) replace;
qui graph export `output'\gambia_ecd_edcc_figure3_gc1.eps, as(eps) replace;

/*age*/
* version with means by 6-month age bins;
foreach y in zfm_full_bl zlh_full_bl {;
	qui gen `y'_mu=.;
	qui gen `y'_lb=.;
	qui gen `y'_ub=.;
	forval q=0/8 {;
		qui ci means `y' if agebin==`q';
		qui replace `y'_mu=r(mean) if agebin==`q';
		qui replace `y'_lb=r(lb) if agebin==`q';
		qui replace `y'_ub=r(ub) if agebin==`q';
	};
};
qui egen x=tag(agebin) if in_baseline_mdat==1;
twoway (lpoly zfm_full_bl agebin, degree(1) lcolor(gray) lwidth(thick))
	(scatter zfm_full_bl_mu agebin if x==1, msymbol(Oh) msize(large) mcolor(black))
	(rspike zfm_full_bl_lb zfm_full_bl_ub agebin if x==1, lcolor(black)),
	title("Fine motor skills") ytitle("z-score") xtitle("age (months)")  
	xlabel(0 "36" 1 "42" 2 "48" 3 "54" 4 "60" 5 "66" 6 "72" 7 "78")
	legend(label(1 "local linear fit") label(2 "mean") label(3 "95% CI") rows(1)) yline(0, lcolor(black)) nodraw name(p`g');
local g=`g'+1;	
twoway (lpoly zlh_full_bl agebin, degree(1) lcolor(gray) lwidth(thick))
	(scatter zlh_full_bl_mu agebin if x==1, msymbol(Oh) msize(large) mcolor(black))
	(rspike zlh_full_bl_lb zlh_full_bl_ub agebin if x==1, lcolor(black)),
	title("Language and hearing skills") xtitle("age (months)") 
	xlabel(0 "36" 1 "42" 2 "48" 3 "54" 4 "60" 5 "66" 6 "72" 7 "78")
	legend(label(1 "local linear fit") label(2 "mean") label(3 "95% CI") rows(1)) yline(0, lcolor(black)) nodraw name(p`g');
drop *mu *lb *ub x;

grc1leg p4 p5, ycommon title("Age") rows(1) cols(2) 
	note("MDAT score normalized by pure control distribution, but unadjusted by age. Means are binned.") name(gc5);
qui graph export `output'\gambia_ecd_edcc_figure3_gc2.png, as(png) replace;
qui graph export `output'\gambia_ecd_edcc_figure3_gc2.emf, as(emf) replace;
qui graph export `output'\gambia_ecd_edcc_figure3_gc2.eps, as(eps) replace;

local end=`"$S_TIME"'; 
di "`start'";
di "`end'";
log close;
